{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": true
   },
   "source": [
    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
    "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Read-in-parliamentary-data\" data-toc-modified-id=\"Read-in-parliamentary-data-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Read in parliamentary data</a></span></li><li><span><a href=\"#Operationalize-constructs\" data-toc-modified-id=\"Operationalize-constructs-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>Operationalize constructs</a></span><ul class=\"toc-item\"><li><span><a href=\"#As-simple-terms\" data-toc-modified-id=\"As-simple-terms-2.1\"><span class=\"toc-item-num\">2.1&nbsp;&nbsp;</span>As simple terms</a></span><ul class=\"toc-item\"><li><span><a href=\"#Schooling\" data-toc-modified-id=\"Schooling-2.1.1\"><span class=\"toc-item-num\">2.1.1&nbsp;&nbsp;</span>Schooling</a></span></li><li><span><a href=\"#Science\" data-toc-modified-id=\"Science-2.1.2\"><span class=\"toc-item-num\">2.1.2&nbsp;&nbsp;</span>Science</a></span></li></ul></li><li><span><a href=\"#Using-synonyms-from-Wordnet-library\" data-toc-modified-id=\"Using-synonyms-from-Wordnet-library-2.2\"><span class=\"toc-item-num\">2.2&nbsp;&nbsp;</span>Using synonyms from Wordnet library</a></span><ul class=\"toc-item\"><li><span><a href=\"#Schooling\" data-toc-modified-id=\"Schooling-2.2.1\"><span class=\"toc-item-num\">2.2.1&nbsp;&nbsp;</span>Schooling</a></span></li><li><span><a href=\"#Scientized-social-order\" data-toc-modified-id=\"Scientized-social-order-2.2.2\"><span class=\"toc-item-num\">2.2.2&nbsp;&nbsp;</span>Scientized social order</a></span></li></ul></li></ul></li><li><span><a href=\"#Code-speeches\" data-toc-modified-id=\"Code-speeches-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>Code speeches</a></span><ul class=\"toc-item\"><li><span><a href=\"#Schooling\" data-toc-modified-id=\"Schooling-3.1\"><span class=\"toc-item-num\">3.1&nbsp;&nbsp;</span>Schooling</a></span></li><li><span><a href=\"#Science\" data-toc-modified-id=\"Science-3.2\"><span class=\"toc-item-num\">3.2&nbsp;&nbsp;</span>Science</a></span></li></ul></li><li><span><a href=\"#Export-coded-speeches\" data-toc-modified-id=\"Export-coded-speeches-4\"><span class=\"toc-item-num\">4&nbsp;&nbsp;</span>Export coded speeches</a></span></li></ul></div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:20:14.146750Z",
     "start_time": "2020-11-26T14:20:10.814300Z"
    }
   },
   "outputs": [],
   "source": [
    "from itertools import chain\n",
    "from nltk.corpus import wordnet\n",
    "from nltk.stem import PorterStemmer\n",
    "from nltk.stem import WordNetLemmatizer\n",
    "import string\n",
    "import pandas as pd\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read in parliamentary data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:20:27.237712Z",
     "start_time": "2020-11-26T14:20:16.975074Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>date</th>\n",
       "      <th>speaker</th>\n",
       "      <th>speech</th>\n",
       "      <th>chamber</th>\n",
       "      <th>year</th>\n",
       "      <th>length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Speaker</td>\n",
       "      <td>['acquaint', 'obedi', 'command', 'attend', 'pe...</td>\n",
       "      <td>lower</td>\n",
       "      <td>1803</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>Lord Hawkesbury</td>\n",
       "      <td>['move', 'walsingham', 'appoint', 'chairman', ...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Lord Chancellor</td>\n",
       "      <td>['second', 'took', 'opportun', 'pai', 'handsom...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>Lord Walsingham</td>\n",
       "      <td>['rose', 'observ', 'habit', 'trespass', 'atten...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Earl of Limerick</td>\n",
       "      <td>['rose', 'second', 'address', 'fulli', 'coinci...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>698</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0        date               speaker  \\\n",
       "0           0  1803-11-22           The Speaker   \n",
       "1           1  1803-11-22       Lord Hawkesbury   \n",
       "2           2  1803-11-22   The Lord Chancellor   \n",
       "3           3  1803-11-22       Lord Walsingham   \n",
       "4           4  1803-11-22  The Earl of Limerick   \n",
       "\n",
       "                                              speech chamber  year  length  \n",
       "0  ['acquaint', 'obedi', 'command', 'attend', 'pe...   lower  1803      22  \n",
       "1  ['move', 'walsingham', 'appoint', 'chairman', ...   upper  1803       5  \n",
       "2  ['second', 'took', 'opportun', 'pai', 'handsom...   upper  1803      24  \n",
       "3  ['rose', 'observ', 'habit', 'trespass', 'atten...   upper  1803      67  \n",
       "4  ['rose', 'second', 'address', 'fulli', 'coinci...   upper  1803     698  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uk = pd.read_csv(\"uk_terms.csv\")\n",
    "uk.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Re-tokenize pre-processed speeches. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:27:06.127409Z",
     "start_time": "2020-11-26T14:24:58.961005Z"
    }
   },
   "outputs": [],
   "source": [
    "uk['speech'] = uk['speech'].apply(eval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:27:06.140856Z",
     "start_time": "2020-11-26T14:27:06.129252Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>date</th>\n",
       "      <th>speaker</th>\n",
       "      <th>speech</th>\n",
       "      <th>chamber</th>\n",
       "      <th>year</th>\n",
       "      <th>length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Speaker</td>\n",
       "      <td>[acquaint, obedi, command, attend, peer, hear,...</td>\n",
       "      <td>lower</td>\n",
       "      <td>1803</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>Lord Hawkesbury</td>\n",
       "      <td>[move, walsingham, appoint, chairman, privileg]</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Lord Chancellor</td>\n",
       "      <td>[second, took, opportun, pai, handsom, complim...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>Lord Walsingham</td>\n",
       "      <td>[rose, observ, habit, trespass, attent, take, ...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>The Earl of Limerick</td>\n",
       "      <td>[rose, second, address, fulli, coincid, sentim...</td>\n",
       "      <td>upper</td>\n",
       "      <td>1803</td>\n",
       "      <td>698</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0        date               speaker  \\\n",
       "0           0  1803-11-22           The Speaker   \n",
       "1           1  1803-11-22       Lord Hawkesbury   \n",
       "2           2  1803-11-22   The Lord Chancellor   \n",
       "3           3  1803-11-22       Lord Walsingham   \n",
       "4           4  1803-11-22  The Earl of Limerick   \n",
       "\n",
       "                                              speech chamber  year  length  \n",
       "0  [acquaint, obedi, command, attend, peer, hear,...   lower  1803      22  \n",
       "1    [move, walsingham, appoint, chairman, privileg]   upper  1803       5  \n",
       "2  [second, took, opportun, pai, handsom, complim...   upper  1803      24  \n",
       "3  [rose, observ, habit, trespass, attent, take, ...   upper  1803      67  \n",
       "4  [rose, second, address, fulli, coincid, sentim...   upper  1803     698  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uk.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:27:06.146789Z",
     "start_time": "2020-11-26T14:27:06.143527Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1293727"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uk.shape[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Operationalize constructs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:27:58.133479Z",
     "start_time": "2020-11-26T14:27:58.130689Z"
    }
   },
   "outputs": [],
   "source": [
    "porter_stemmer = PorterStemmer()\n",
    "\n",
    "def stem_words(tokens):\n",
    "    '''\n",
    "    Takes a list of tokens;\n",
    "    Returns a list of stems.\n",
    "    '''\n",
    "    return [porter_stemmer.stem(token) for token in tokens]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### As simple terms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Schooling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:28:06.277643Z",
     "start_time": "2020-11-26T14:28:06.275210Z"
    }
   },
   "outputs": [],
   "source": [
    "schooling = ['schooling', 'schoolhouse', 'schools', \n",
    "             'schoolteacher', 'teacher', 'teaching', 'schoolmaster',\n",
    "             'pupil', 'student', 'literacy', 'child']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Science"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:28:15.310964Z",
     "start_time": "2020-11-26T14:28:15.308273Z"
    }
   },
   "outputs": [],
   "source": [
    "science = ['science', 'scientific', 'fact', 'factual',\n",
    "           'theory', 'theoretical', 'data', 'figure',\n",
    "           'evidence','statistics', 'empirical',\n",
    "           'average', 'sample', 'population','census', \n",
    "           'sociology', 'economics', 'psychology', 'rate', 'state']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using synonyms from Wordnet library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-08-18T20:16:14.061012Z",
     "start_time": "2020-08-18T20:16:14.052275Z"
    }
   },
   "outputs": [],
   "source": [
    "with open(\"to_delete.txt\", \"r\") as f:\n",
    "    distal_synonyms = f.read().splitlines()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Schooling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:28:41.474678Z",
     "start_time": "2020-11-26T14:28:39.292419Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "48\n",
      "school_day\n",
      "pupil\n",
      "nipper\n",
      "tiddler\n",
      "precept\n",
      "tyke\n",
      "child\n",
      "learn\n",
      "educational_act\n",
      "small_fri\n",
      "bookman\n",
      "tike\n",
      "schoolteach\n",
      "fri\n",
      "nestl\n",
      "schoolmast\n",
      "babi\n",
      "cultiv\n",
      "minor\n",
      "educate\n",
      "scholar\n",
      "schooltim\n",
      "command\n",
      "master\n",
      "train\n",
      "school-age_child\n",
      "civilis\n",
      "teach\n",
      "teacher\n",
      "didact\n",
      "civil\n",
      "instruct\n",
      "lutjanus_apodu\n",
      "school\n",
      "headmast\n",
      "schoolhous\n",
      "pedagogi\n",
      "shoal\n",
      "instructor\n",
      "schoolchild\n",
      "shaver\n",
      "kid\n",
      "scholarly_person\n",
      "educ\n",
      "school_teach\n",
      "literaci\n",
      "student\n",
      "youngster\n"
     ]
    }
   ],
   "source": [
    "schoolings = []\n",
    "for term in schooling: \n",
    "    schoolings.extend(wordnet.synsets(term))\n",
    "schoolings = set(chain.from_iterable([word.lemma_names() for word in schoolings]))\n",
    "schoolings = stem_words(schoolings)\n",
    "# schoolings = [term for term in schoolings if term not in distal_synonyms]\n",
    "schoolings = set(schoolings)\n",
    "print(len(schoolings))\n",
    "for term in schoolings:\n",
    "    print(term)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Scientized social order"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:29:32.813835Z",
     "start_time": "2020-11-26T14:29:32.807139Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "124\n"
     ]
    }
   ],
   "source": [
    "sciences = []\n",
    "for term in science:\n",
    "    sciences.extend(wordnet.synsets(term))\n",
    "sciences = set(chain.from_iterable([word.lemma_names() for word in sciences]))\n",
    "sciences = stem_words(sciences)\n",
    "sciences = set(sciences)\n",
    "print(len(sciences))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Code speeches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:29:40.290365Z",
     "start_time": "2020-11-26T14:29:40.232487Z"
    }
   },
   "outputs": [],
   "source": [
    "uk_coded = uk[['date', 'year', 'chamber', 'length']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Schooling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:31:13.293440Z",
     "start_time": "2020-11-26T14:29:41.918683Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-15-6b3b7a80286c>:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  uk_coded[\"sch_{}\".format(keyword)] = uk['speech'].apply(lambda x: x.count(keyword))\n"
     ]
    }
   ],
   "source": [
    "for keyword in schoolings: \n",
    "    uk_coded[\"sch_{}\".format(keyword)] = uk['speech'].apply(lambda x: x.count(keyword))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:31:48.184792Z",
     "start_time": "2020-11-26T14:31:48.160022Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>year</th>\n",
       "      <th>chamber</th>\n",
       "      <th>length</th>\n",
       "      <th>sch_school_day</th>\n",
       "      <th>sch_pupil</th>\n",
       "      <th>sch_nipper</th>\n",
       "      <th>sch_tiddler</th>\n",
       "      <th>sch_precept</th>\n",
       "      <th>sch_tyke</th>\n",
       "      <th>...</th>\n",
       "      <th>sch_instructor</th>\n",
       "      <th>sch_schoolchild</th>\n",
       "      <th>sch_shaver</th>\n",
       "      <th>sch_kid</th>\n",
       "      <th>sch_scholarly_person</th>\n",
       "      <th>sch_educ</th>\n",
       "      <th>sch_school_teach</th>\n",
       "      <th>sch_literaci</th>\n",
       "      <th>sch_student</th>\n",
       "      <th>sch_youngster</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>1803</td>\n",
       "      <td>lower</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>1803</td>\n",
       "      <td>upper</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>1803</td>\n",
       "      <td>upper</td>\n",
       "      <td>24</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>1803</td>\n",
       "      <td>upper</td>\n",
       "      <td>67</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1803-11-22</td>\n",
       "      <td>1803</td>\n",
       "      <td>upper</td>\n",
       "      <td>698</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 52 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         date  year chamber  length  sch_school_day  sch_pupil  sch_nipper  \\\n",
       "0  1803-11-22  1803   lower      22               0          0           0   \n",
       "1  1803-11-22  1803   upper       5               0          0           0   \n",
       "2  1803-11-22  1803   upper      24               0          0           0   \n",
       "3  1803-11-22  1803   upper      67               0          0           0   \n",
       "4  1803-11-22  1803   upper     698               0          0           0   \n",
       "\n",
       "   sch_tiddler  sch_precept  sch_tyke  ...  sch_instructor  sch_schoolchild  \\\n",
       "0            0            0         0  ...               0                0   \n",
       "1            0            0         0  ...               0                0   \n",
       "2            0            0         0  ...               0                0   \n",
       "3            0            0         0  ...               0                0   \n",
       "4            0            0         0  ...               0                0   \n",
       "\n",
       "   sch_shaver  sch_kid  sch_scholarly_person  sch_educ  sch_school_teach  \\\n",
       "0           0        0                     0         0                 0   \n",
       "1           0        0                     0         0                 0   \n",
       "2           0        0                     0         0                 0   \n",
       "3           0        0                     0         0                 0   \n",
       "4           0        0                     0         0                 0   \n",
       "\n",
       "   sch_literaci  sch_student  sch_youngster  \n",
       "0             0            0              0  \n",
       "1             0            0              0  \n",
       "2             0            0              0  \n",
       "3             0            0              0  \n",
       "4             0            0              0  \n",
       "\n",
       "[5 rows x 52 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uk_coded.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Science"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:36:00.141782Z",
     "start_time": "2020-11-26T14:31:59.861021Z"
    }
   },
   "outputs": [],
   "source": [
    "for keyword in sciences:\n",
    "    uk_coded[\"sci_{}\".format(keyword)] = uk['speech'].apply(lambda x: x.count(keyword))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export coded speeches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:36:18.623147Z",
     "start_time": "2020-11-26T14:36:18.617565Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1293727, 176)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uk_coded.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-26T14:37:01.563312Z",
     "start_time": "2020-11-26T14:36:28.899376Z"
    }
   },
   "outputs": [],
   "source": [
    "uk_coded.to_csv(\"uk_coded_new.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
